/**
 * Copyright (C) 2007 The AsyncMail Group <asyncmail@googlegroups.com>
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *         http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.googlecode.asyncmail.smtpserver.contenthandler;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.UnknownHostException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.james.mime4j.AbstractContentHandler;
import org.apache.james.mime4j.BodyDescriptor;
import org.springframework.beans.factory.annotation.Required;

import com.googlecode.asyncmail.dnsservice.DNSService;

// TODO: Improve regex for domain extraction
public class SurblContentHandler extends AbstractContentHandler {

    private boolean listed;

    private final static String HTTP_URL_REGEX = ".*http:\\/\\/(.*)";

    private final static String IP_REGEX = "^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$";

    private final static String DOMAIN_REGEX = ".*\\.(.+\\.[A-Z]{2,6})$";

    // ccTLDs regex
    private final static String TLD_REGEX = "(.*\\.)(2000\\.hu|ab\\.ca|ab\\.se|abo\\.pa|ac\\.ae|ac\\.at|ac\\.be|ac\\.cn|ac\\.com|ac\\.cr|ac\\.cy|ac\\.fj|ac\\.fk|ac\\.gg|ac\\.gn|ac\\.id|ac\\.il|ac\\.im|ac\\.in|ac\\.ir|ac\\.je|ac\\.jp|ac\\.ke|ac\\.kr|ac\\.ma|ac\\.mw|ac\\.ng|ac\\.nz|ac\\.om|ac\\.pa|ac\\.pg|ac\\.ru|ac\\.rw|ac\\.se|ac\\.th|ac\\.tj|ac\\.tz|ac\\.ug|ac\\.uk|ac\\.vn|ac\\.yu|ac\\.za|ac\\.zm|ac\\.zw|act\\.au|ad\\.jp|adm\\.br|adult\\.ht|adv\\.br|adygeya\\.ru|aero\\.mv|aero\\.tt|aeroport\\.fr|agr\\.br|agrar\\.hu|agro\\.pl|ah\\.cn|aichi\\.jp|aid\\.pl|ak\\.us|akita\\.jp|al\\.us|aland\\.fi|alderney\\.gg|alt\\.na|alt\\.za|altai\\.ru|am\\.br|amur\\.ru|amursk\\.ru|aomori\\.jp|ar\\.us|arkhangelsk\\.ru|army\\.mil|arq\\.br|art\\.br|art\\.do|art\\.dz|art\\.ht|art\\.pl|arts\\.co|arts\\.ro|arts\\.ve|asn\\.au|asn\\.lv|ass\\.dz|assedic\\.fr|assn\\.lk|asso\\.dz|asso\\.fr|asso\\.gp|asso\\.ht|asso\\.mc|asso\\.re|astrakhan\\.ru|at\\.tt|atm\\.pl|ato\\.br|au\\.com|au\\.tt|auto\\.pl|av\\.tr|avocat\\.fr|avoues\\.fr|az\\.us|baikal\\.ru|barreau\\.fr|bashkiria\\.ru|bbs\\.tr|bc\\.ca|bd\\.se|be\\.tt|bel\\.tr|belgie\\.be|belgorod\\.ru|bib\\.ve|bio\\.br|bir\\.ru|biz\\.az|biz\\.cy|biz\\.et|biz\\.fj|biz\\.mv|biz\\.nr|biz\\.om|biz\\.pk|biz\\.pl|biz\\.pr|biz\\.tj|biz\\.tr|biz\\.tt|biz\\.vn|bj\\.cn|bl\\.uk|bmd\\.br|bolt\\.hu|bourse\\.za|br\\.com|brand\\.se|british-library\\.uk|bryansk\\.ru|buryatia\\.ru|c\\.se|ca\\.tt|ca\\.us|casino\\.hu|cbg\\.ru|cci\\.fr|ch\\.vu|chambagri\\.fr|chel\\.ru|chelyabinsk\\.ru|cherkassy\\.ua|chernigov\\.ua|chernovtsy\\.ua|chiba\\.jp|chirurgiens-dentistes\\.fr|chita\\.ru|chukotka\\.ru|chuvashia\\.ru|cim\\.br|city\\.hu|city\\.za|ck\\.ua|club\\.tw|cmw\\.ru|cn\\.com|cn\\.ua|cng\\.br|cnt\\.br|co\\.ae|co\\.ag|co\\.ao|co\\.at|co\\.bw|co\\.ck|co\\.cr|co\\.dk|co\\.fk|co\\.gg|co\\.hu|co\\.id|co\\.il|co\\.im|co\\.in|co\\.ir|co\\.je|co\\.jp|co\\.ke|co\\.kr|co\\.ls|co\\.ma|co\\.mu|co\\.mw|co\\.nz|co\\.om|co\\.rw|co\\.st|co\\.th|co\\.tj|co\\.tt|co\\.tv|co\\.tz|co\\.ug|co\\.uk|co\\.us|co\\.ve|co\\.vi|co\\.yu|co\\.za|co\\.zm|co\\.zw|com\\.ac|com\\.ae|com\\.af|com\\.ag|com\\.ai|com\\.al|com\\.an|com\\.ar|com\\.au|com\\.aw|com\\.az|com\\.bb|com\\.bd|com\\.bh|com\\.bm|com\\.bn|com\\.bo|com\\.br|com\\.bs|com\\.bt|com\\.bz|com\\.cd|com\\.ch|com\\.cn|com\\.co|com\\.cu|com\\.cy|com\\.dm|com\\.do|com\\.dz|com\\.ec|com\\.ee|com\\.eg|com\\.er|com\\.es|com\\.et|com\\.fj|com\\.fk|com\\.fr|com\\.ge|com\\.gh|com\\.gi|com\\.gn|com\\.gp|com\\.gr|com\\.gt|com\\.gu|com\\.hk|com\\.hn|com\\.hr|com\\.ht|com\\.io|com\\.jm|com\\.jo|com\\.kg|com\\.kh|com\\.kw|com\\.ky|com\\.kz|com\\.la|com\\.lb|com\\.lc|com\\.li|com\\.lk|com\\.lr|com\\.lv|com\\.ly|com\\.mg|com\\.mk|com\\.mm|com\\.mn|com\\.mo|com\\.mt|com\\.mu|com\\.mv|com\\.mw|com\\.mx|com\\.my|com\\.na|com\\.nc|com\\.ng|com\\.ni|com\\.np|com\\.nr|com\\.om|com\\.pa|com\\.pe|com\\.pf|com\\.pg|com\\.ph|com\\.pk|com\\.pl|com\\.pr|com\\.ps|com\\.pt|com\\.py|com\\.qa|com\\.re|com\\.ro|com\\.ru|com\\.rw|com\\.sa|com\\.sb|com\\.sc|com\\.sd|com\\.sg|com\\.sh|com\\.st|com\\.sv|com\\.sy|com\\.tj|com\\.tn|com\\.tr|com\\.tt|com\\.tw|com\\.ua|com\\.uy|com\\.ve|com\\.vi|com\\.vn|com\\.vu|com\\.ws|com\\.ye|conf\\.au|conf\\.lv|consulado\\.st|coop\\.br|coop\\.ht|coop\\.mv|coop\\.mw|coop\\.tt|cpa\\.pro|cq\\.cn|cri\\.nz|crimea\\.ua|csiro\\.au|ct\\.us|cul\\.na|cv\\.ua|d\\.se|dagestan\\.ru|dc\\.us|de\\.com|de\\.net|de\\.tt|de\\.us|de\\.vu|dk\\.org|dk\\.tt|dn\\.ua|dnepropetrovsk\\.ua|dni\\.us|dns\\.be|donetsk\\.ua|dp\\.ua|dpn\\.br|dr\\.tr|dudinka\\.ru|e-burg\\.ru|e\\.se|e164\\.arpa|ebiz\\.tw|ecn\\.br|ed\\.ao|ed\\.cr|ed\\.jp|edu\\.ac|edu\\.af|edu\\.ai|edu\\.al|edu\\.an|edu\\.ar|edu\\.au|edu\\.az|edu\\.bb|edu\\.bd|edu\\.bh|edu\\.bm|edu\\.bn|edu\\.bo|edu\\.br|edu\\.bt|edu\\.ck|edu\\.cn|edu\\.co|edu\\.cu|edu\\.dm|edu\\.do|edu\\.dz|edu\\.ec|edu\\.eg|edu\\.er|edu\\.es|edu\\.et|edu\\.ge|edu\\.gh|edu\\.gi|edu\\.gp|edu\\.gr|edu\\.gt|edu\\.gu|edu\\.hk|edu\\.hn|edu\\.ht|edu\\.in|edu\\.jm|edu\\.jo|edu\\.kg|edu\\.kh|edu\\.kw|edu\\.ky|edu\\.kz|edu\\.lb|edu\\.lc|edu\\.lk|edu\\.lr|edu\\.lv|edu\\.ly|edu\\.mg|edu\\.mm|edu\\.mn|edu\\.mo|edu\\.mt|edu\\.mv|edu\\.mw|edu\\.mx|edu\\.my|edu\\.na|edu\\.ng|edu\\.ni|edu\\.np|edu\\.nr|edu\\.om|edu\\.pa|edu\\.pe|edu\\.pf|edu\\.ph|edu\\.pk|edu\\.pl|edu\\.pr|edu\\.ps|edu\\.pt|edu\\.py|edu\\.qa|edu\\.ru|edu\\.rw|edu\\.sa|edu\\.sb|edu\\.sc|edu\\.sd|edu\\.sg|edu\\.sh|edu\\.sk|edu\\.st|edu\\.sv|edu\\.tj|edu\\.tr|edu\\.tt|edu\\.tw|edu\\.ua|edu\\.uk|edu\\.uy|edu\\.ve|edu\\.vi|edu\\.vn|edu\\.vu|edu\\.ws|edu\\.ye|edu\\.yu|edu\\.za|edunet\\.tn|ehime\\.jp|ekloges\\.cy|embaixada\\.st|eng\\.br|ens\\.tn|ernet\\.in|erotica\\.hu|erotika\\.hu|es\\.tt|esp\\.br|etc\\.br|eti\\.br|eu\\.com|eu\\.org|eu\\.tt|eun\\.eg|experts-comptables\\.fr|f\\.se|fam\\.pk|far\\.br|fareast\\.ru|fax\\.nr|fed\\.us|fgov\\.be|fh\\.se|fhs\\.no|fhsk\\.se|fhv\\.se|fi\\.cr|fie\\.ee|film\\.hu|fin\\.ec|fin\\.tn|firm\\.co|firm\\.ht|firm\\.in|firm\\.ro|firm\\.ve|fj\\.cn|fl\\.us|fm\\.br|fnd\\.br|folkebibl\\.no|forum\\.hu|fot\\.br|fr\\.tt|fr\\.vu|from\\.hr|fst\\.br|fukui\\.jp|fukuoka\\.jp|fukushima\\.jp|fylkesbibl\\.no|g\\.se|g12\\.br|ga\\.us|game\\.tw|games\\.hu|gb\\.com|gb\\.net|gc\\.ca|gd\\.cn|geek\\.nz|gen\\.in|gen\\.nz|gen\\.tr|geometre-expert\\.fr|ggf\\.br|gifu\\.jp|gmina\\.pl|go\\.cr|go\\.id|go\\.jp|go\\.ke|go\\.kr|go\\.th|go\\.tj|go\\.tz|go\\.ug|gob\\.bo|gob\\.do|gob\\.es|gob\\.gt|gob\\.hn|gob\\.mx|gob\\.ni|gob\\.pa|gob\\.pe|gob\\.pk|gob\\.sv|gok\\.pk|gon\\.pk|gop\\.pk|gos\\.pk|gouv\\.fr|gouv\\.ht|gouv\\.rw|gov\\.ac|gov\\.ae|gov\\.af|gov\\.ai|gov\\.al|gov\\.ar|gov\\.au|gov\\.az|gov\\.bb|gov\\.bd|gov\\.bf|gov\\.bh|gov\\.bm|gov\\.bo|gov\\.br|gov\\.bt|gov\\.by|gov\\.ch|gov\\.ck|gov\\.cn|gov\\.co|gov\\.cu|gov\\.cx|gov\\.cy|gov\\.dm|gov\\.do|gov\\.dz|gov\\.ec|gov\\.eg|gov\\.er|gov\\.et|gov\\.fj|gov\\.fk|gov\\.ge|gov\\.gg|gov\\.gh|gov\\.gi|gov\\.gn|gov\\.gr|gov\\.gu|gov\\.hk|gov\\.ie|gov\\.il|gov\\.im|gov\\.in|gov\\.io|gov\\.ir|gov\\.it|gov\\.je|gov\\.jm|gov\\.jo|gov\\.jp|gov\\.kg|gov\\.kh|gov\\.kw|gov\\.ky|gov\\.kz|gov\\.lb|gov\\.lc|gov\\.li|gov\\.lk|gov\\.lr|gov\\.lt|gov\\.lu|gov\\.lv|gov\\.ly|gov\\.ma|gov\\.mg|gov\\.mm|gov\\.mn|gov\\.mo|gov\\.mt|gov\\.mv|gov\\.mw|gov\\.my|gov\\.ng|gov\\.np|gov\\.nr|gov\\.om|gov\\.ph|gov\\.pk|gov\\.pl|gov\\.pr|gov\\.ps|gov\\.pt|gov\\.py|gov\\.qa|gov\\.ru|gov\\.rw|gov\\.sa|gov\\.sb|gov\\.sc|gov\\.sd|gov\\.sg|gov\\.sh|gov\\.sk|gov\\.st|gov\\.sy|gov\\.tj|gov\\.tn|gov\\.to|gov\\.tp|gov\\.tr|gov\\.tt|gov\\.tv|gov\\.tw|gov\\.ua|gov\\.uk|gov\\.ve|gov\\.vi|gov\\.vn|gov\\.ws|gov\\.ye|gov\\.za|gov\\.zm|gov\\.zw|govt\\.nz|gr\\.jp|greta\\.fr|grozny\\.ru|grp\\.lk|gs\\.cn|gsm\\.pl|gub\\.uy|guernsey\\.gg|gunma\\.jp|gv\\.ao|gv\\.at|gx\\.cn|gz\\.cn|h\\.se|ha\\.cn|hb\\.cn|he\\.cn|health\\.vn|herad\\.no|hi\\.cn|hi\\.us|hiroshima\\.jp|hk\\.cn|hl\\.cn|hn\\.cn|hokkaido\\.jp|hotel\\.hu|hotel\\.lk|hu\\.com|huissier-justice\\.fr|hyogo\\.jp|i\\.se|ia\\.us|ibaraki\\.jp|icnet\\.uk|id\\.au|id\\.fj|id\\.ir|id\\.lv|id\\.ly|id\\.us|idf\\.il|idn\\.sg|idrett\\.no|idv\\.hk|idv\\.tw|if\\.ua|il\\.us|imb\\.br|in-addr\\.arpa|in\\.th|in\\.ua|in\\.us|ind\\.br|ind\\.er|ind\\.gg|ind\\.gt|ind\\.in|ind\\.je|ind\\.tn|inf\\.br|inf\\.cu|info\\.au|info\\.az|info\\.co|info\\.cy|info\\.ec|info\\.et|info\\.fj|info\\.ht|info\\.hu|info\\.mv|info\\.nr|info\\.pl|info\\.pr|info\\.ro|info\\.sd|info\\.tn|info\\.tr|info\\.tt|info\\.ve|info\\.vn|ing\\.pa|ingatlan\\.hu|inima\\.al|int\\.ar|int\\.az|int\\.bo|int\\.co|int\\.lk|int\\.mv|int\\.mw|int\\.pt|int\\.ru|int\\.rw|int\\.tj|int\\.tt|int\\.ve|int\\.vn|intl\\.tn|ip6\\.arpa|iris\\.arpa|irkutsk\\.ru|isa\\.us|ishikawa\\.jp|isla\\.pr|it\\.ao|it\\.tt|ivano-frankivsk\\.ua|ivanovo\\.ru|iwate\\.jp|iwi\\.nz|iz\\.hr|izhevsk\\.ru|jamal\\.ru|jar\\.ru|jersey\\.je|jet\\.uk|jl\\.cn|jobs\\.tt|jogasz\\.hu|jor\\.br|joshkar-ola\\.ru|js\\.cn|jx\\.cn|k-uralsk\\.ru|k\\.se|k12\\.ec|k12\\.il|k12\\.tr|kagawa\\.jp|kagoshima\\.jp|kalmykia\\.ru|kaluga\\.ru|kamchatka\\.ru|kanagawa\\.jp|kanazawa\\.jp|karelia\\.ru|kawasaki\\.jp|kazan\\.ru|kchr\\.ru|kemerovo\\.ru|kh\\.ua|khabarovsk\\.ru|khakassia\\.ru|kharkov\\.ua|kherson\\.ua|khmelnitskiy\\.ua|khv\\.ru|kids\\.us|kiev\\.ua|kirov\\.ru|kirovograd\\.ua|kitakyushu\\.jp|km\\.ua|kms\\.ru|kobe\\.jp|kochi\\.jp|koenig\\.ru|komforb\\.se|komi\\.ru|kommunalforbund\\.se|kommune\\.no|komvux\\.se|konyvelo\\.hu|kostroma\\.ru|kr\\.ua|krasnoyarsk\\.ru|ks\\.ua|ks\\.us|kuban\\.ru|kumamoto\\.jp|kurgan\\.ru|kursk\\.ru|kustanai\\.ru|kuzbass\\.ru|kv\\.ua|ky\\.us|kyonggi\\.kr|kyoto\\.jp|la\\.us|lakas\\.hu|lanarb\\.se|lanbib\\.se|law\\.pro|law\\.za|lel\\.br|lg\\.jp|lg\\.ua|lipetsk\\.ru|lkd\\.co\\.im|ln\\.cn|ltd\\.co\\.im|ltd\\.cy|ltd\\.gg|ltd\\.gi|ltd\\.je|ltd\\.lk|ltd\\.uk|lugansk\\.ua|lutsk\\.ua|lviv\\.ua|m\\.se|ma\\.us|magadan\\.ru|magnitka\\.ru|mail\\.pl|maori\\.nz|mari-el\\.ru|mari\\.ru|marine\\.ru|mat\\.br|matsuyama\\.jp|mb\\.ca|md\\.us|me\\.uk|me\\.us|med\\.br|med\\.ec|med\\.ee|med\\.ht|med\\.ly|med\\.om|med\\.pa|med\\.pro|med\\.sa|med\\.sd|medecin\\.fr|media\\.hu|media\\.pl|mi\\.th|mi\\.us|miasta\\.pl|mie\\.jp|mil\\.ac|mil\\.ae|mil\\.ar|mil\\.az|mil\\.bd|mil\\.bo|mil\\.br|mil\\.by|mil\\.co|mil\\.do|mil\\.ec|mil\\.eg|mil\\.er|mil\\.fj|mil\\.ge|mil\\.gh|mil\\.gt|mil\\.gu|mil\\.hn|mil\\.id|mil\\.in|mil\\.io|mil\\.jo|mil\\.kg|mil\\.kh|mil\\.kw|mil\\.kz|mil\\.lb|mil\\.lt|mil\\.lu|mil\\.lv|mil\\.mg|mil\\.mv|mil\\.my|mil\\.no|mil\\.np|mil\\.nz|mil\\.om|mil\\.pe|mil\\.ph|mil\\.pl|mil\\.ru|mil\\.rw|mil\\.se|mil\\.sh|mil\\.sk|mil\\.st|mil\\.tj|mil\\.tr|mil\\.tw|mil\\.uk|mil\\.uy|mil\\.ve|mil\\.ye|mil\\.za|miyagi\\.jp|miyazaki\\.jp|mk\\.ua|mn\\.us|mo\\.cn|mo\\.us|mob\\.nr|mobi\\.tt|mobil\\.nr|mobile\\.nr|mod\\.gi|mod\\.om|mod\\.uk|mordovia\\.ru|mosreg\\.ru|ms\\.us|msk\\.ru|mt\\.us|muni\\.il|murmansk\\.ru|mus\\.br|museum\\.mn|museum\\.mv|museum\\.mw|museum\\.no|museum\\.om|museum\\.tt|music\\.mobi|mytis\\.ru|n\\.se|nagano\\.jp|nagasaki\\.jp|nagoya\\.jp|nakhodka\\.ru|nalchik\\.ru|name\\.ae|name\\.az|name\\.cy|name\\.et|name\\.fj|name\\.hr|name\\.mv|name\\.my|name\\.pr|name\\.tj|name\\.tr|name\\.tt|name\\.vn|nara\\.jp|nat\\.tn|national-library-scotland\\.uk|naturbruksgymn\\.se|navy\\.mil|nb\\.ca|nc\\.us|nd\\.us|ne\\.jp|ne\\.ke|ne\\.kr|ne\\.tz|ne\\.ug|ne\\.us|nel\\.uk|net\\.ac|net\\.ae|net\\.af|net\\.ag|net\\.ai|net\\.al|net\\.an|net\\.ar|net\\.au|net\\.az|net\\.bb|net\\.bd|net\\.bh|net\\.bm|net\\.bn|net\\.bo|net\\.br|net\\.bs|net\\.bt|net\\.bz|net\\.cd|net\\.ch|net\\.ck|net\\.cn|net\\.co|net\\.cu|net\\.cy|net\\.dm|net\\.do|net\\.dz|net\\.ec|net\\.eg|net\\.er|net\\.et|net\\.fj|net\\.fk|net\\.ge|net\\.gg|net\\.gn|net\\.gp|net\\.gr|net\\.gt|net\\.gu|net\\.hk|net\\.hn|net\\.ht|net\\.id|net\\.il|net\\.im|net\\.in|net\\.io|net\\.ir|net\\.je|net\\.jm|net\\.jo|net\\.jp|net\\.kg|net\\.kh|net\\.kw|net\\.ky|net\\.kz|net\\.la|net\\.lb|net\\.lc|net\\.li|net\\.lk|net\\.lr|net\\.lu|net\\.lv|net\\.ly|net\\.ma|net\\.mm|net\\.mo|net\\.mt|net\\.mv|net\\.mw|net\\.mx|net\\.my|net\\.na|net\\.nc|net\\.ng|net\\.ni|net\\.np|net\\.nr|net\\.nz|net\\.om|net\\.pa|net\\.pe|net\\.pg|net\\.ph|net\\.pk|net\\.pl|net\\.pr|net\\.ps|net\\.pt|net\\.py|net\\.qa|net\\.ru|net\\.rw|net\\.sa|net\\.sb|net\\.sc|net\\.sd|net\\.sg|net\\.sh|net\\.st|net\\.sy|net\\.th|net\\.tj|net\\.tn|net\\.tr|net\\.tt|net\\.tw|net\\.ua|net\\.uk|net\\.uy|net\\.ve|net\\.vi|net\\.vn|net\\.vu|net\\.ws|net\\.ye|net\\.za|news\\.hu|nf\\.ca|ngo\\.lk|ngo\\.ph|ngo\\.pl|ngo\\.za|nh\\.us|nhs\\.uk|nic\\.im|nic\\.in|nic\\.tt|nic\\.uk|nieruchomosci\\.pl|niigata\\.jp|nikolaev\\.ua|nj\\.us|nkz\\.ru|nl\\.ca|nls\\.uk|nm\\.cn|nm\\.us|nnov\\.ru|no\\.com|nom\\.ad|nom\\.ag|nom\\.br|nom\\.co|nom\\.es|nom\\.fk|nom\\.fr|nom\\.mg|nom\\.ni|nom\\.pa|nom\\.pe|nom\\.pl|nom\\.re|nom\\.ro|nom\\.ve|nom\\.za|nome\\.pt|norilsk\\.ru|not\\.br|notaires\\.fr|nov\\.ru|novosibirsk\\.ru|ns\\.ca|nsk\\.ru|nsn\\.us|nsw\\.au|nt\\.au|nt\\.ca|nt\\.ro|ntr\\.br|nu\\.ca|nv\\.us|nx\\.cn|ny\\.us|o\\.se|od\\.ua|odessa\\.ua|odo\\.br|off\\.ai|og\\.ao|oh\\.us|oita\\.jp|ok\\.us|okayama\\.jp|okinawa\\.jp|omsk\\.ru|on\\.ca|or\\.at|or\\.cr|or\\.id|or\\.jp|or\\.ke|or\\.kr|or\\.th|or\\.tz|or\\.ug|or\\.us|orenburg\\.ru|org\\.ac|org\\.ae|org\\.ag|org\\.ai|org\\.al|org\\.an|org\\.ar|org\\.au|org\\.az|org\\.bb|org\\.bd|org\\.bh|org\\.bm|org\\.bn|org\\.bo|org\\.br|org\\.bs|org\\.bt|org\\.bw|org\\.bz|org\\.cd|org\\.ch|org\\.ck|org\\.cn|org\\.co|org\\.cu|org\\.cy|org\\.dm|org\\.do|org\\.dz|org\\.ec|org\\.ee|org\\.eg|org\\.er|org\\.es|org\\.et|org\\.fj|org\\.fk|org\\.ge|org\\.gg|org\\.gh|org\\.gi|org\\.gn|org\\.gp|org\\.gr|org\\.gt|org\\.gu|org\\.hk|org\\.hn|org\\.ht|org\\.hu|org\\.il|org\\.im|org\\.in|org\\.io|org\\.ir|org\\.je|org\\.jm|org\\.jo|org\\.jp|org\\.kg|org\\.kh|org\\.kw|org\\.ky|org\\.kz|org\\.la|org\\.lb|org\\.lc|org\\.li|org\\.lk|org\\.lr|org\\.ls|org\\.lu|org\\.lv|org\\.ly|org\\.ma|org\\.mg|org\\.mk|org\\.mm|org\\.mn|org\\.mo|org\\.mt|org\\.mv|org\\.mw|org\\.mx|org\\.my|org\\.na|org\\.nc|org\\.ng|org\\.ni|org\\.np|org\\.nr|org\\.nz|org\\.om|org\\.pa|org\\.pe|org\\.pf|org\\.ph|org\\.pk|org\\.pl|org\\.pr|org\\.ps|org\\.pt|org\\.py|org\\.qa|org\\.ro|org\\.ru|org\\.sa|org\\.sb|org\\.sc|org\\.sd|org\\.se|org\\.sg|org\\.sh|org\\.st|org\\.sv|org\\.sy|org\\.tj|org\\.tn|org\\.tr|org\\.tt|org\\.tw|org\\.ua|org\\.uk|org\\.uy|org\\.ve|org\\.vi|org\\.vn|org\\.vu|org\\.ws|org\\.ye|org\\.yu|org\\.za|org\\.zm|org\\.zw|oryol\\.ru|osaka\\.jp|oskol\\.ru|otc\\.au|oz\\.au|pa\\.us|palana\\.ru|parliament\\.cy|parliament\\.uk|parti\\.se|pb\\.ao|pc\\.pl|pe\\.ca|pe\\.kr|penza\\.ru|per\\.kh|per\\.sg|perm\\.ru|perso\\.ht|pharmacien\\.fr|pl\\.ua|plc\\.co\\.im|plc\\.ly|plc\\.uk|plo\\.ps|pol\\.dz|pol\\.ht|pol\\.tr|police\\.uk|poltava\\.ua|port\\.fr|powiat\\.pl|pp\\.az|pp\\.ru|pp\\.se|ppg\\.br|prd\\.fr|prd\\.mg|press\\.cy|press\\.ma|press\\.se|presse\\.fr|pri\\.ee|principe\\.st|priv\\.at|priv\\.hu|priv\\.no|priv\\.pl|pro\\.ae|pro\\.br|pro\\.cy|pro\\.ec|pro\\.fj|pro\\.ht|pro\\.mv|pro\\.om|pro\\.pr|pro\\.tt|pro\\.vn|psc\\.br|psi\\.br|pskov\\.ru|ptz\\.ru|pub\\.sa|publ\\.pt|pvt\\.ge|pyatigorsk\\.ru|qc\\.ca|qc\\.com|qh\\.cn|qld\\.au|qsl\\.br|re\\.kr|realestate\\.pl|rec\\.br|rec\\.co|rec\\.ro|rec\\.ve|red\\.sv|reklam\\.hu|rel\\.ht|rel\\.pl|res\\.in|ri\\.us|rnd\\.ru|rnrt\\.tn|rns\\.tn|rnu\\.tn|rovno\\.ua|ru\\.com|rubtsovsk\\.ru|rv\\.ua|ryazan\\.ru|s\\.se|sa\\.au|sa\\.com|sa\\.cr|saga\\.jp|saitama\\.jp|sakhalin\\.ru|samara\\.ru|saotome\\.st|sapporo\\.jp|saratov\\.ru|sark\\.gg|sc\\.cn|sc\\.ke|sc\\.ug|sc\\.us|sch\\.ae|sch\\.gg|sch\\.ir|sch\\.je|sch\\.lk|sch\\.ly|sch\\.ng|sch\\.om|sch\\.sa|sch\\.sd|sch\\.uk|sch\\.zm|school\\.fj|school\\.nz|school\\.za|sci\\.eg|sd\\.cn|sd\\.us|se\\.com|se\\.tt|sebastopol\\.ua|sec\\.ps|sendai\\.jp|seoul\\.kr|sex\\.hu|sex\\.pl|sh\\.cn|shiga\\.jp|shimane\\.jp|shizuoka\\.jp|shop\\.ht|shop\\.hu|shop\\.pl|simbirsk\\.ru|sk\\.ca|sklep\\.pl|sld\\.do|sld\\.pa|slg\\.br|smolensk\\.ru|sn\\.cn|snz\\.ru|soc\\.lk|soros\\.al|sos\\.pl|spb\\.ru|sport\\.hu|srv\\.br|sshn\\.se|stat\\.no|stavropol\\.ru|store\\.co|store\\.ro|store\\.st|store\\.ve|stv\\.ru|suli\\.hu|sumy\\.ua|surgut\\.ru|sx\\.cn|syzran\\.ru|szex\\.hu|szkola\\.pl|t\\.se|takamatsu\\.jp|tambov\\.ru|targi\\.pl|tas\\.au|tatarstan\\.ru|te\\.ua|tec\\.ve|tel\\.no|tel\\.nr|tel\\.tr|telecom\\.na|telememo\\.au|ternopil\\.ua|test\\.ru|tirana\\.al|tj\\.cn|tlf\\.nr|tm\\.cy|tm\\.fr|tm\\.hu|tm\\.mc|tm\\.mg|tm\\.mt|tm\\.pl|tm\\.ro|tm\\.se|tm\\.za|tmp\\.br|tn\\.us|tochigi\\.jp|tokushima\\.jp|tokyo\\.jp|tom\\.ru|tomsk\\.ru|tottori\\.jp|tourism\\.pl|tourism\\.tn|toyama\\.jp|tozsde\\.hu|travel\\.pl|travel\\.tt|trd\\.br|tsaritsyn\\.ru|tsk\\.ru|tula\\.ru|tur\\.br|turystyka\\.pl|tuva\\.ru|tv\\.bo|tv\\.br|tv\\.sd|tver\\.ru|tw\\.cn|tx\\.us|tyumen\\.ru|u\\.se|udm\\.ru|udmurtia\\.ru|uk\\.com|uk\\.net|uk\\.tt|ulan-ude\\.ru|unam\\.na|uniti\\.al|upt\\.al|uri\\.arpa|urn\\.arpa|us\\.com|us\\.tt|ut\\.us|utazas\\.hu|utsunomiya\\.jp|uu\\.mt|uy\\.com|uzhgorod\\.ua|va\\.us|vatican\\.va|vdonsk\\.ru|vet\\.br|veterinaire\\.fr|vgs\\.no|vic\\.au|video\\.hu|vinnica\\.ua|vladikavkaz\\.ru|vladimir\\.ru|vladivostok\\.ru|vn\\.ua|volgograd\\.ru|vologda\\.ru|voronezh\\.ru|vrn\\.ru|vt\\.us|vyatka\\.ru|w\\.se|wa\\.au|wa\\.us|wakayama\\.jp|weather\\.mobi|web\\.co|web\\.do|web\\.lk|web\\.pk|web\\.tj|web\\.tr|web\\.ve|web\\.za|wi\\.us|wv\\.us|www\\.ro|wy\\.us|x\\.se|xj\\.cn|xz\\.cn|y\\.se|yakutia\\.ru|yamagata\\.jp|yamaguchi\\.jp|yamal\\.ru|yamanashi\\.jp|yaroslavl\\.ru|yekaterinburg\\.ru|yk\\.ca|yn\\.cn|yokohama\\.jp|yuzhno-sakhalinsk\\.ru|z\\.se|za\\.com|zaporizhzhe\\.ua|zgrad\\.ru|zhitomir\\.ua|zj\\.cn|zlg\\.br|zp\\.ua|zt\\.ua)$";

    private Pattern httpPattern = Pattern.compile(HTTP_URL_REGEX,
            Pattern.CASE_INSENSITIVE);

    private Pattern ipPattern = Pattern.compile(IP_REGEX,
            Pattern.CASE_INSENSITIVE);

    private Pattern domainPattern = Pattern.compile(DOMAIN_REGEX,
            Pattern.CASE_INSENSITIVE);

    private Pattern tldPattern = Pattern.compile(TLD_REGEX,
            Pattern.CASE_INSENSITIVE);

    private Log logger;

    private List<String> rblList;

    private DNSService dns;

    @Required
    public void setLog(Log logger) {
        this.logger = logger;
    }

    @Required
    public void setSurblServerList(List<String> rblList) {
        this.rblList = rblList;
    }

    @Required
    public void setDnsService(DNSService dns) {
        this.dns = dns;
    }

    @Override
    public void body(BodyDescriptor bd, InputStream is) throws IOException {
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, bd
                .getCharset()));

        String line = null;

        while ((line = reader.readLine()) != null) {

            if (checkSurbl(getDomainOrReverseIpAddress(line))) {
                listed = true;
                break;
            }
        }

        reader.close();
    }

    /**
     * Extract the domain or ipAddress from the given text
     * 
     * @param text
     * @return
     */
    private String getDomainOrReverseIpAddress(String text) {
        logger.debug("Check line: " + text);
        Matcher httpMatcher = httpPattern.matcher(text);

        if (httpMatcher.matches()) {
            String domain;

            String rawUrl = httpMatcher.group(1);

            int index = rawUrl.indexOf("/");

            if (index > -1) {
                // just cut of any directory specified in the url
                domain = rawUrl.substring(0, index);
            } else {
                domain = rawUrl;

            }

            Matcher tldMatcher = tldPattern.matcher(domain);
            Matcher ipMatcher = ipPattern.matcher(domain);

            if (tldMatcher.matches()) {
                String[] args = tldMatcher.group(0).split("\\.");
                int i = args.length;

                String tldDomain = args[i - 3] + "." + args[i - 2] + "."
                        + args[i - 1];

                logger.debug("Valid domain " + tldDomain);

                return tldDomain;
            } else if (ipMatcher.matches()) {

                logger.debug("Valid ipAddress: " + domain);

                // return the ip reversed
                return ipMatcher.group(4) + "." + ipMatcher.group(3) + "."
                        + ipMatcher.group(2) + "." + ipMatcher.group(1);

            } else {
                Matcher domainMatcher = domainPattern.matcher(domain);

                if (domainMatcher.matches()) {

                    // Strip sbudomains and return the domain
                    return domainMatcher.group(1);

                } else if (domain.split("\\.").length == 2) {

                    // No subdomain to strip, just return the extracted domain
                    return domain;
                } else {

                    // The domain/ipAddress in the url is not valid at all
                    logger.debug("Unknown tld or ipAddress: " + domain);
                }
            }

        }
        return null;
    }

    /**
     * Return true if a domain or ipAddress was found which is listed on any
     * configured surbl servers
     * 
     * @param text
     *                the ipAddress or domain to check against surbl
     * @return true if listed domain or ipAddress was found
     */
    private boolean checkSurbl(String text) {
        if (text != null) {
            logger.debug("Check domain/ipAddress: " + text);

            for (int i = 0; i < rblList.size(); i++) {
                try {
                    String rawEntry = text + "." + rblList.get(i);
                    dns.getByName(rawEntry);

                    // If we reach this block the domain or ipAddress is listed
                    logger.debug("Entry listed: " + rawEntry);

                    return true;
                } catch (UnknownHostException e) {
                    // Not listed, just ignore
                }
            }
        }
        return false;
    }

    /**
     * Return true if a domain or ipAddress was found which is listed on any
     * configured surbl servers
     * 
     * @return true if listed domain or ipAddress was found
     */
    public boolean isSurblListed() {
        return listed;
    }

}
